---
title: "ITaP Data Analysis"
output:
  html_document: default
  word_document: default
editor_options:
  chunk_output_type: inline
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r, error = FALSE, warning=FALSE, message=FALSE}
rm(list=ls()) # clear environment
set.seed(123) # ensure reproducible code
# packages needed:
library(readxl) # needed for reading in .xlsx files 
library(ggplot2) # used for plotting
library(BayesFactor) # for bayesian analysis 
library(readxl) # needed for reading in .xlsx files 
library(dplyr) # needed for data manipulation
library(afex) # for convenience functions for analyzing factorial experiments
library(esci) #for calculating effect sizes and CIs

# read in the full dataset (146 obs of 107 variables)
data <- read.csv('ITaPDataset_v11_anonymised.csv')

```

Standardising outcome variables using baseline means and SDs from each respective ITaP session (unless otherwise specified)

```{r}

#self efficacy data
data$zITaP3EffBaseScore <- scale(data$ITaP3EffBaseScore)
data$zITaP4EffBaseScore <- scale(data$ITaP4EffBaseScore)
data$zITaP3EffEndScore <- ((data$ITaP3EffEndScore - mean(data$ITaP3EffBaseScore, na.rm = TRUE)) / sd(data$ITaP3EffBaseScore, na.rm = TRUE))
data$zITaP4EffEndScore <- ((data$ITaP4EffEndScore - mean(data$ITaP4EffBaseScore, na.rm = TRUE)) / sd(data$ITaP4EffBaseScore, na.rm = TRUE))

#quizzes
data$zITaP4QuizzesBaseScore <- scale(data$ITaP4ScaffBaseScore) #note no baseline quiz scores available for ITaP3
data$zITaP3QuizzesEndScore <- scale(data$ITaP3QuesEndScore) #standardising endline scores based on endline data due to lack of ITaP3 baseline data
data$zITaP4QuizzesEndScore <- scale(data$ITaP4ScaffEndScore) #standardising endline scores based on endline data due to lack of ITaP3 baseline data

#digital approximations
data$zITaP3DA1BaseScore <- scale(data$ITaP3DA1BaseScore)
data$zITaP3DA2BaseScore <- scale(data$ITaP3DA2BaseScore)
data$zITaP3DA3BaseScore <- scale(data$ITaP3DA3BaseScore)
data$zITaP3DA1EndScore <- ((data$ITaP3DA1EndScore - mean(data$ITaP3DA1BaseScore, na.rm = TRUE)) / sd(data$ITaP3DA1BaseScore, na.rm = TRUE))
data$zITaP3DA2EndScore <- ((data$ITaP3DA2EndScore - mean(data$ITaP3DA2BaseScore, na.rm = TRUE)) / sd(data$ITaP3DA2BaseScore, na.rm = TRUE))
data$zITaP3DA3EndScore <- ((data$ITaP3DA3EndScore - mean(data$ITaP3DA3BaseScore, na.rm = TRUE)) / sd(data$ITaP3DA3BaseScore, na.rm = TRUE))

data$ITaP3DAAverageBaseScore <- 
  rowMeans(data[c("zITaP3DA1BaseScore","zITaP3DA2BaseScore","zITaP3DA3BaseScore")], na.rm = TRUE)
data$ITaP3DAAverageEndScore <- 
  rowMeans(data[c("zITaP3DA1EndScore","zITaP3DA2EndScore","zITaP3DA3EndScore")], na.rm = TRUE)

#lesson plans
data$zITaP3LessonBaseScore <- scale(data$ITaP3LessonBaseScore)
data$zITaP4LessonBaseScore <- scale(data$ITaP4LessonBaseScore)
data$zITaP3LessonEndScore <- ((data$ITaP3LessonEndScore - mean(data$ITaP3LessonBaseScore, na.rm = TRUE)) / sd(data$ITaP3LessonBaseScore, na.rm = TRUE))
data$zITaP4LessonEndScore <- ((data$ITaP4LessonEndScore - mean(data$ITaP4LessonBaseScore, na.rm = TRUE)) / sd(data$ITaP4LessonBaseScore, na.rm = TRUE))
```

Creating new standardised outcome variables based on modality rather than ITaP session

```{r}
#self-efficacy data
data$HybridEffBaseScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP3EffBaseScore, 
                              data$zITaP4EffBaseScore)
data$InpersonEffBaseScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP4EffBaseScore, 
                              data$zITaP3EffBaseScore)
data$HybridEffEndScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP3EffEndScore, 
                              data$zITaP4EffEndScore)
data$InpersonEffEndScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP4EffEndScore, 
                              data$zITaP3EffEndScore)
#quiz data
data$HybridQuizBaseScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              NA, 
                              data$zITaP4QuizzesBaseScore)
data$InpersonQuizBaseScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP4QuizzesBaseScore, 
                              NA)
data$HybridQuizEndScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP3QuizzesEndScore, 
                              data$zITaP4QuizzesEndScore)
data$InpersonQuizEndScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP4QuizzesEndScore, 
                              data$zITaP3QuizzesEndScore)

#lesson plans data
data$HybridLessonBaseScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP3LessonBaseScore, 
                              data$zITaP4LessonBaseScore)
data$InpersonLessonBaseScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP4LessonBaseScore, 
                              data$zITaP3LessonBaseScore)
data$HybridLessonEndScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP3LessonEndScore, 
                              data$zITaP4LessonEndScore)
data$InpersonLessonEndScore <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$zITaP4LessonEndScore, 
                              data$zITaP3LessonEndScore)
```

## Self-Efficacy: Full Analysis using recoded, standardised data

These analyses are done on the 63 trainees for whom we have data at both ITaP 3 and 4 (and therefore in person and hybrid) as well as at both timepoints

```{r}

# grab indices of the 63 individuals that have data on all measures
allID_eff <- ((!is.na(as.numeric(data$HybridEffBaseScore))) + (!is.na(as.numeric(data$HybridEffEndScore))) + (!is.na(as.numeric(data$InpersonEffBaseScore))) + (!is.na(as.numeric(data$InpersonEffEndScore)))) == 4

# collect data for all individuals in this analysis  
HybridEffBase_full <- data$HybridEffBaseScore[allID_eff]
HybridEffEnd_full <- data$HybridEffEndScore[allID_eff]
InpersonEffBase_full <- data$InpersonEffBaseScore[allID_eff]
InpersonEffEnd_full <- data$InpersonEffEndScore[allID_eff]
HybridEffCombined_full <- ((data$HybridEffBaseScore[allID_eff] + data$HybridEffEndScore[allID_eff]) / 2) #used for calculating effect size of modality main effect
InpersonEffCombined_full <- ((data$InpersonEffBaseScore[allID_eff] + data$InpersonEffEndScore[allID_eff]) / 2) #used for calculating effect size of modality main effect
BaselineEffCombined_full <- ((data$HybridEffBaseScore[allID_eff] + data$InpersonEffBaseScore[allID_eff]) / 2) #used for calculating effect size of timepoint main effect
EndlineEffCombined_full <- ((data$HybridEffEndScore[allID_eff] + data$InpersonEffEndScore[allID_eff]) / 2) #used for calculating effect size of timepoint main effect
ID <- data$ID[allID_eff]

# collate data into a single object 
effData <- data.frame(effScores = c(HybridEffBase_full,HybridEffEnd_full,InpersonEffBase_full,InpersonEffEnd_full), 
      ID = rep(ID,4), 
      Timepoint = as.factor(c(rep("Baseline", length(ID)), rep("Endline",length(ID)), rep("Baseline",length(ID)), rep("Endline", length(ID)))), 
      Modality = as.factor(c(rep("Hybrid", length(ID)), rep("Hybrid",length(ID)), rep("In-person",length(ID)), rep("In-person", length(ID)))))
```

```{r}
# Display table of means and sds
eff_tibble_full <- effData %>%
  group_by(Modality, Timepoint) %>%
  summarize(round(mean(effScores),2),sd(effScores)) %>%
  ungroup()
print(eff_tibble_full)

# Produce plot
eff_pointplot <- ggplot(effData, aes(x = Timepoint, y = effScores, color = Modality, group = Modality)) +
  stat_summary(fun = mean, geom = "point", position = position_dodge(width = 0.8), size = 3) +
  stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.8), width = 0.2) +
  stat_summary(fun = mean, geom = "line", aes(group = Modality), position = position_dodge(width = 0.8)) +
  theme_classic() +
  scale_color_manual(values = c("In-person" = "navy", "Hybrid" = "coral3")) +
  xlab("Timepoint") +
  ylab("Mean Self-Efficacy") +
  theme(legend.title = element_blank())
ggsave(filename = "eff_fulldata.jpg", plot = eff_pointplot, dpi = 300, quality = 100)

# run 2 (modality: in person, hybrid) x 2 (timepoint: baseline, endline) repeated measures ANOVA and print summary of results 
eff_anova <- afex::aov_car(effScores ~ Modality * Timepoint + Error(ID/(Modality*Timepoint)), data = effData, type = 3)
summary(eff_anova)

#Calculate effect sizes and CIs
#Modality (main effect)
cohend_effmodality <- estimate_mdiff_paired(comparison_mean=mean(InpersonEffCombined_full), comparison_sd=sd(InpersonEffCombined_full), reference_mean=mean(HybridEffCombined_full), reference_sd=sd(HybridEffCombined_full), n=63, correlation=cor(InpersonEffCombined_full, HybridEffCombined_full), conf_level=0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_effmodality_d <- cohend_effmodality$es_smd$effect_size[1]
cohend_effmodality_LL <- cohend_effmodality$es_smd$LL[1]
cohend_effmodality_UL <- cohend_effmodality$es_smd$UL[1]

#Timepoint (main effect)
cohend_efftimepoint <- estimate_mdiff_paired(
comparison_mean=mean(EndlineEffCombined_full), comparison_sd=sd(EndlineEffCombined_full), reference_mean=mean(BaselineEffCombined_full), reference_sd=sd(BaselineEffCombined_full),n=63, correlation=cor(EndlineEffCombined_full, BaselineEffCombined_full), conf_level=0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_efftimepoint_d <- cohend_efftimepoint$es_smd$effect_size[1]
cohend_efftimepoint_LL <- cohend_efftimepoint$es_smd$LL[1]
cohend_efftimepoint_UL <- cohend_efftimepoint$es_smd$UL[1]

#Modality*Timepoint (interaction)
diffs_effinperson <- InpersonEffEnd_full - InpersonEffBase_full #difference of differences
diffs_effhybrid <- HybridEffEnd_full - HybridEffBase_full #difference of differences

cohend_effinteract <- estimate_mdiff_paired(
comparison_mean=mean(diffs_effinperson),
comparison_sd=sd(diffs_effinperson), 
reference_mean=mean(diffs_effhybrid), 
reference_sd=sd(diffs_effhybrid),
n=63, correlation=cor(diffs_effinperson, diffs_effhybrid), conf_level=0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_effinteract_d <- cohend_effinteract$es_smd$effect_size[1]
cohend_effinteract_LL <- cohend_effinteract$es_smd$LL[1]
cohend_effinteract_UL <- cohend_effinteract$es_smd$UL[1]

# Print the result
print(paste("Cohen's d (Main effect of modality, In-person vs. Hybrid):", 
round(cohend_effmodality_d, 2), " [95% CI:", round(cohend_effmodality_LL, 2), ",", round(cohend_effmodality_UL, 2),"]"))

print(paste("Cohen's d (Main effect of timepoint, Endline vs. Baseline):", 
round(cohend_efftimepoint_d, 2), " [95% CI:", round(cohend_efftimepoint_LL, 2), ",", round(cohend_efftimepoint_UL, 2),"]"))

print(paste("Cohen's d (Overall interaction):", round(cohend_effinteract_d, 2), 
" [95% CI:", round(cohend_effinteract_LL, 2), ",", round(cohend_effinteract_UL, 2), "]"))
```

```{r}
# run Bayesian version of analysis above 
BfData <- effData
BfData$Modality = as.factor(BfData$Modality)
BfData$Timepoint <- as.factor(BfData$Timepoint)
BfData$ID <- as.factor(BfData$ID)
bf <- anovaBF(effScores ~ Timepoint*Modality +ID , data=BfData, whichRandom = 'ID')
print(bf) # print the results of the bayesian analysis
bf[4] / bf[3] # print the bayes factor for the interaction effect 
```

## Quizzes: *Endline-only* analysis using recoded, standardised data (full analysis not possible, due to loss of ITaP3 baseline data)

These analyses are done on the 78 trainees for whom we have endline quiz data at both ITaP 3 and 4 (and therefore in person and hybrid)

```{r}

# grab indices of the 78 individuals that have data on both endlines
allendlineID_quiz <- ((!is.na(as.numeric(data$HybridQuizEndScore))) + (!is.na(as.numeric(data$InpersonQuizEndScore)))) == 2

# collect data for all individuals in this analysis  
HybridQuizEnd_endlineanalysis <- data$HybridQuizEndScore[allendlineID_quiz]
InpersonQuizEnd_endlineanalysis <- data$InpersonQuizEndScore[allendlineID_quiz]
ID_quizendline <- data$ID[allendlineID_quiz]

# collate data into a single object 
quizData_endline <- data.frame(quizScores = c(HybridQuizEnd_endlineanalysis,InpersonQuizEnd_endlineanalysis), 
      ID_quizendline = rep(ID_quizendline,2), 
      Modality = as.factor(c(rep("Hybrid", length(ID_quizendline)), rep("In-person", length(ID_quizendline)))))
```

```{r}

# Display table of means and sds
quiz_tibble_endline <- quizData_endline %>%
  group_by(Modality) %>%
  summarize(round(mean(quizScores),2),sd(quizScores)) %>%
  ungroup()
print(quiz_tibble_endline)

# produce plot
quiz_pointplot_endline <- ggplot(quizData_endline, aes(x=Modality, y=quizScores)) +
  stat_summary(fun=mean, geom="point", position=position_dodge(width=0.8), size=3) +
  stat_summary(fun.data=mean_cl_normal, geom="errorbar", position=position_dodge(width=0.8), width=0.2) +
  theme_classic() + 
  xlab("School Visit Modality") + 
  ylab("Mean Quiz score") +
  theme(legend.title=element_blank())
quiz_pointplot_endline # print point plot
ggsave(filename = "quiz_endlinedata.jpg", plot = quiz_pointplot_endline, dpi = 300, quality = 100)

# run one-sided t-test on endline data (modality: in person, hybrid) and print 
quiz_ttest_result <- t.test(InpersonQuizEnd_endlineanalysis, HybridQuizEnd_endlineanalysis, 
                        alternative = "greater", var.equal = TRUE, paired = TRUE)
print(quiz_ttest_result)

# Calculate effect size and CIs
cohend_quizendline <- estimate_mdiff_paired(
comparison_mean=mean(InpersonQuizEnd_endlineanalysis),
comparison_sd=sd(InpersonQuizEnd_endlineanalysis), reference_mean=mean(HybridQuizEnd_endlineanalysis),
reference_sd=sd(HybridQuizEnd_endlineanalysis), n=78, 
correlation=cor(InpersonQuizEnd_endlineanalysis, HybridQuizEnd_endlineanalysis), 
conf_level=0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_quizendline_d <- cohend_quizendline$es_smd$effect_size[1]
cohend_quizendline_LL <- cohend_quizendline$es_smd$LL[1]
cohend_quizendline_UL <- cohend_quizendline$es_smd$UL[1]

# Print the result
print(paste("Cohen's d:", round(cohend_quizendline_d, 2), " [95% CI:", round(cohend_quizendline_LL, 2), ",", round(cohend_quizendline_UL, 2),"]"))
```

Bayes version

```{r}
ttestBF(InpersonQuizEnd_endlineanalysis, HybridQuizEnd_endlineanalysis,
        paired=TRUE)
```

## Digital Approximations: *Exploratory analysis*, comparing standardised ITaP3 baseline-endline approximation data (N = 56) (full analysis not possible, due to loss of ITaP4 endline data)

```{r}

# grab indices of the 56 individuals that have approximation data at both ITaP3 timepoints AND who completed all three approximations at both times
allitap3ID_da <- ifelse(!is.na(data$N_ITaP3DAsCompleted_Base) & !is.na(data$N_ITaP3DAsCompleted_End), 
                        data$N_ITaP3DAsCompleted_Base == 3 & data$N_ITaP3DAsCompleted_End == 3, 
                        FALSE)

# collect data for all individuals in this analysis  
ITaP3DABase_ITaP3analysis <- data$ITaP3DAAverageBaseScore[allitap3ID_da]
ITaP3DAEnd_ITaP3analysis <- data$ITaP3DAAverageEndScore[allitap3ID_da]
ID_daITaP3 <- data$ID[allitap3ID_da]
ITaP3Mod_daITaP3 <- data$ITaP3Mod[allitap3ID_da]

# collate data into a single object 
daData_ITaP3 <- data.frame(daScores = c(ITaP3DABase_ITaP3analysis,ITaP3DAEnd_ITaP3analysis), 
      ID_daITaP3 = rep(ID_daITaP3,2), 
      Timepoint = as.factor(c(rep("Baseline", length(ID_daITaP3)), rep("Endline", length(ID_daITaP3)))),
      ITaP3Mod_daITaP3 = rep(ITaP3Mod_daITaP3,2))

# Display table of means and sds
da_tibble_itap3 <- daData_ITaP3 %>%
  group_by(ITaP3Mod_daITaP3, Timepoint) %>%
  summarize(round(mean(daScores),2),sd(daScores)) %>%
  ungroup()
print(da_tibble_itap3)

# produce plot
daitap3_pointplot <- ggplot(daData_ITaP3, aes(x=Timepoint, y=daScores, colour = ITaP3Mod_daITaP3, group = ITaP3Mod_daITaP3)) +
  stat_summary(fun=mean, geom="point", position=position_dodge(width=0.8), size=3) +
  stat_summary(fun.data=mean_cl_normal, geom="errorbar", position=position_dodge(width=0.8), width=0.2) +
  stat_summary(fun = mean, geom = "line", aes(group = ITaP3Mod_daITaP3), position = position_dodge(width = 0.8))+
  theme_classic() + 
  scale_color_manual(values = c("In-person" = "navy", "Hybrid" = "coral3")) +
  xlab("Timepoint") + 
  ylab("Mean Digital Approximation Score") +
  theme(legend.title=element_blank())
daitap3_pointplot # print point plot
ggsave(filename = "da_itap3data.jpg", plot = daitap3_pointplot, dpi = 300, quality = 100)
```

```{r}
# Perform an exploratory mixed-factor ANOVA and produce table of cell means

anova_daitap3 <- afex::aov_car(daScores ~ ITaP3Mod_daITaP3 * Timepoint + Error(ID_daITaP3/Timepoint), data = daData_ITaP3, type = 3)
anova_daitap3
summary(anova_daitap3)


#Effect sizes DA

#Modality (main effect)
inperson_data <- daData_ITaP3 %>% dplyr::filter(ITaP3Mod_daITaP3== "In-person") %>% dplyr::select(daScores)
hybrid_data <- daData_ITaP3 %>% dplyr::filter(ITaP3Mod_daITaP3 == "Hybrid") %>% dplyr::select(daScores)

cohend_damodality <- estimate_mdiff_two(
comparison_mean=mean(inperson_data$daScores),
comparison_sd=sd(inperson_data$daScores), comparison_n = 54, 
reference_mean = mean(hybrid_data$daScores), 
reference_sd = sd(hybrid_data$daScores),reference_n = 58, 
grouping_variable_levels = c("inperson", "hybrid"), 
conf_level = 0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_damodality_d <- cohend_damodality$es_smd$effect_size[1]
cohend_damodality_LL <- cohend_damodality$es_smd$LL[1]
cohend_damodality_UL <- cohend_damodality$es_smd$UL[1]

                   
#Timepoint (main effect)
baseline_data <- daData_ITaP3 %>% dplyr::filter(Timepoint == "Baseline") %>% dplyr::select(daScores)
endline_data <- daData_ITaP3 %>% dplyr::filter(Timepoint== "Endline") %>% dplyr::select(daScores)

cohend_datimepoint <- estimate_mdiff_paired(
  comparison_mean = mean(endline_data$daScores), 
  comparison_sd = sd(endline_data$daScores), 
  reference_mean = mean(baseline_data$daScores), 
  reference_sd = sd(baseline_data$daScores), 
  n = 56, 
  correlation = cor(endline_data$daScores, baseline_data$daScores), 
  conf_level = 0.95
)

# Extract Cohen's d (d_average), LL, and UL
cohend_datimepoint_d <- cohend_datimepoint$es_smd$effect_size[1]
cohend_datimepoint_LL <- cohend_datimepoint$es_smd$LL[1]
cohend_datimepoint_UL <- cohend_datimepoint$es_smd$UL[1]


#Modality*Timepoint (interaction)
# In person modality, time point simple effect
InPersonDABase <- daData_ITaP3 %>% dplyr::filter(ITaP3Mod_daITaP3 == "In-person" &Timepoint == "Baseline") %>% dplyr::select(daScores)
InPersonDAEnd <- daData_ITaP3 %>% dplyr::filter(ITaP3Mod_daITaP3 == "In-person" &Timepoint == "Endline") %>% dplyr::select(daScores)
diffs_dainperson <-InPersonDAEnd - InPersonDABase

# Hybrid modality, time point simple effect
HybridDABase <- daData_ITaP3 %>% dplyr::filter(ITaP3Mod_daITaP3 == "Hybrid" &Timepoint == "Baseline") %>% dplyr::select(daScores)
HybridDAEnd <- daData_ITaP3 %>% dplyr::filter(ITaP3Mod_daITaP3 == "Hybrid" &Timepoint == "Endline") %>% dplyr::select(daScores)
diffs_dahybrid <- HybridDAEnd - HybridDABase

cohend_dainteract <- estimate_mdiff_two(
  comparison_mean=mean(diffs_dainperson$daScores),
  comparison_sd=sd(diffs_dainperson$daScores), comparison_n = 27, 
  reference_mean = mean(diffs_dahybrid$daScores), 
  reference_sd = sd(diffs_dahybrid$daScores),reference_n = 29, 
  grouping_variable_levels = c("inperson", "hybrid"), 
  conf_level = 0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_dainteract_d <- cohend_dainteract$es_smd$effect_size[1]
cohend_dainteract_LL <- cohend_dainteract$es_smd$LL[1]
cohend_dainteract_UL <- cohend_dainteract$es_smd$UL[1]


#Print results
print(paste("Cohen's d (Main effect of modality, In-person vs. Hybrid):", round(cohend_damodality_d, 2), " [95% CI:", round(cohend_damodality_LL, 2),",", round(cohend_damodality_UL, 2), "]"))

print(paste("Cohen's d (Main effect of timepoint, Endline vs. Baseline):", round(cohend_datimepoint_d, 2), " [95% CI:", round(cohend_datimepoint_LL, 2),",", round(cohend_datimepoint_UL, 2), "]"))

print(paste("Cohen's d (Overall interaction):", 
round(cohend_dainteract_d, 2), " [95% CI:", round(cohend_dainteract_LL, 2),
",", round(cohend_dainteract_UL, 2), "]"))

```

## Lesson plans: Full Analysis using recoded, standardised data

These analyses are done on the 76 trainees for whom we have data at both ITaP 3 and 4 (and therefore in person and hybrid) as well as at both timepoints.

```{r}

# grab indices of the 76 individuals that have data on all measures
allID_lesson <- ((!is.na(as.numeric(data$HybridLessonBaseScore))) + (!is.na(as.numeric(data$HybridLessonEndScore))) + (!is.na(as.numeric(data$InpersonLessonBaseScore))) + (!is.na(as.numeric(data$InpersonLessonEndScore)))) == 4

# collect data for all individuals in this analysis  
HybridLessonBase_full <- data$HybridLessonBaseScore[allID_lesson]
HybridLessonEnd_full <- data$HybridLessonEndScore[allID_lesson]
InpersonLessonBase_full <- data$InpersonLessonBaseScore[allID_lesson]
InpersonLessonEnd_full <- data$InpersonLessonEndScore[allID_lesson]
HybridLessonCombined_full <- ((data$HybridLessonBaseScore[allID_lesson] + data$HybridLessonEndScore[allID_lesson]) / 2) #used for calculating effect size of modality main effect
InpersonLessonCombined_full <- ((data$InpersonLessonBaseScore[allID_lesson] + data$InpersonLessonEndScore[allID_lesson]) / 2) #used for calculating effect size of modality main effect
BaselineLessonCombined_full <- ((data$HybridLessonBaseScore[allID_lesson] + data$InpersonLessonBaseScore[allID_lesson]) / 2) #used for calculating effect size of timepoint main effect
EndlineLessonCombined_full <- ((data$HybridLessonEndScore[allID_lesson] + data$InpersonLessonEndScore[allID_lesson]) / 2) #used for calculating effect size of timepoint main effect
ID <- data$ID[allID_lesson]

# collate data into a single object 
lessonData <- data.frame(lessonScores = c(HybridLessonBase_full,HybridLessonEnd_full,InpersonLessonBase_full,InpersonLessonEnd_full), 
      ID = rep(ID,4), 
      Timepoint = as.factor(c(rep("Baseline", length(ID)), rep("Endline",length(ID)), rep("Baseline",length(ID)), rep("Endline", length(ID)))), 
      Modality = as.factor(c(rep("Hybrid", length(ID)), rep("Hybrid",length(ID)), rep("In-person",length(ID)), rep("In-person", length(ID)))))
```

```{r}
# Display table of means and sds
lesson_tibble_full <- lessonData %>%
  group_by(Modality, Timepoint) %>%
  summarize(round(mean(lessonScores),2),sd(lessonScores)) %>%
  ungroup()
print(lesson_tibble_full)

# produce plot
lesson_pointplot <- ggplot(lessonData, aes(x = Timepoint, y = lessonScores, color = Modality, group = Modality)) +
  stat_summary(fun = mean, geom = "point", position = position_dodge(width = 0.8), size = 3) +
  stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.8), width = 0.2) +
  stat_summary(fun = mean, geom = "line", aes(group = Modality), position = position_dodge(width = 0.8)) +
  theme_classic() +
  scale_color_manual(values = c("In-person" = "navy", "Hybrid" = "coral3")) +
  xlab("Timepoint") +
  ylab("Mean Lesson Plan Score") +
  theme(legend.title = element_blank())
lesson_pointplot # print point plot
ggsave(filename = "lesson_full.jpg", plot = lesson_pointplot, dpi = 300, quality = 100)


# run 2 (modality: in person, hybrid) x 2 (timepoint: baseline, endline) repeated measures ANOVA and print summary of results 
lesson_anova <- afex::aov_car(lessonScores ~ Modality * Timepoint + Error(ID/(Modality*Timepoint)), data = lessonData, type = 3)
summary(lesson_anova)

# run t-tests for post-hoc pairwise comparisons and print 
lesson_ttest_modalitybaseline2 <- t.test(HybridLessonBase_full, InpersonLessonBase_full, 
                        alternative = "two.sided", var.equal = TRUE, paired = TRUE)
lesson_ttest_modalityendline2 <- t.test(HybridLessonEnd_full, InpersonLessonEnd_full, 
                        alternative = "two.sided", var.equal = TRUE, paired = TRUE)
lesson_ttest_modalitybaseline1 <- t.test(HybridLessonBase_full, InpersonLessonBase_full, 
                        alternative = "less", var.equal = TRUE, paired = TRUE)
lesson_ttest_modalityendline1 <- t.test(HybridLessonEnd_full, InpersonLessonEnd_full, 
                        alternative = "less", var.equal = TRUE, paired = TRUE)
lesson_ttest_timepointhybrid2 <- t.test(HybridLessonEnd_full, HybridLessonBase_full, 
                        alternative = "two.sided", var.equal = TRUE, paired = TRUE)
lesson_ttest_timepointinperson2 <- t.test(InpersonLessonEnd_full, InpersonLessonBase_full, 
                        alternative = "two.sided", var.equal = TRUE, paired = TRUE)

print(lesson_ttest_modalitybaseline2) #Simple effect of modality (in-person vs. hybrid) at baseline, TWO-TAILED
print(lesson_ttest_modalityendline2) #Simple effect of modality (in-person vs. hybrid) at endline, TWO-TAILED
print(lesson_ttest_modalitybaseline1) #Simple effect of modality (in-person vs. hybrid) at baseline, ONE-TAILED
print(lesson_ttest_modalityendline1) #Simple effect of modality (in-person vs. hybrid) at endline, ONE-TAILED
print(lesson_ttest_timepointhybrid2) #Simple effect of timepoint (baseline vs. endline) in the hybrid itap, TWO-TAILED
print(lesson_ttest_timepointinperson2) #Simple effect of timepoint (in-person vs. hybrid) in the in-person itap, TWO-TAILED

#Calculate effect sizes and CIs
#Modality (main effect)
cohend_lessonmodality <- estimate_mdiff_paired(
comparison_mean=mean(InpersonLessonCombined_full), 
comparison_sd=sd(InpersonLessonCombined_full), reference_mean=mean(HybridLessonCombined_full), 
reference_sd=sd(HybridLessonCombined_full), n=76, correlation=cor(InpersonLessonCombined_full, HybridLessonCombined_full), conf_level=0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_lessonmodality_d <- cohend_lessonmodality$es_smd$effect_size[1]
cohend_lessonmodality_LL <- cohend_lessonmodality$es_smd$LL[1]
cohend_lessonmodality_UL <- cohend_lessonmodality$es_smd$UL[1]

#Timepoint (main effect)
cohend_lessontimepoint <- estimate_mdiff_paired(
comparison_mean=mean(EndlineLessonCombined_full), 
comparison_sd=sd(EndlineLessonCombined_full), reference_mean=mean(BaselineLessonCombined_full), 
reference_sd=sd(BaselineLessonCombined_full), n=76, correlation=cor(EndlineLessonCombined_full, BaselineLessonCombined_full), conf_level=0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_lessontimepoint_d <- cohend_lessontimepoint$es_smd$effect_size[1]
cohend_lessontimepoint_LL <- cohend_lessontimepoint$es_smd$LL[1]
cohend_lessontimepoint_UL <- cohend_lessontimepoint$es_smd$UL[1]

#Modality*Timepoint (interaction)
diffs_lessoninperson <- InpersonLessonEnd_full - InpersonLessonBase_full #difference of differences
diffs_lessonhybrid <- HybridLessonEnd_full - HybridLessonBase_full #difference of differences

cohend_lessoninteract <- estimate_mdiff_paired(
comparison_mean=mean(diffs_lessoninperson),
comparison_sd=sd(diffs_lessoninperson), 
reference_mean=mean(diffs_lessonhybrid), 
reference_sd=sd(diffs_lessonhybrid), n=76, 
correlation=cor(diffs_lessoninperson, diffs_lessonhybrid), conf_level=0.95)

# Extract Cohen's d (d_average), LL, and UL
cohend_lessoninteract_d <- cohend_lessoninteract$es_smd$effect_size[1]
cohend_lessoninteract_LL <- cohend_lessoninteract$es_smd$LL[1]
cohend_lessoninteract_UL <- cohend_lessoninteract$es_smd$UL[1]

# Print the result
print(paste("Cohen's d (Main effect of modality, In-person vs. Hybrid):", 
round(cohend_lessonmodality_d, 2), 
" [95% CI:", round(cohend_lessonmodality_LL, 2), ",", round(cohend_lessonmodality_UL, 2), "]"))

print(paste("Cohen's d (Main effect of timepoint, Endline vs. Baseline):", 
round(cohend_lessontimepoint_d, 2), 
" [95% CI:", round(cohend_lessontimepoint_LL, 2), ",", round(cohend_lessontimepoint_UL, 2), "]"))

print(paste("Cohen's d (Overall interaction):", 
round(cohend_lessoninteract_d, 2), 
" [95% CI:", round(cohend_lessoninteract_LL, 2), ",", round(cohend_lessoninteract_UL, 2), "]"))



```

```{r}
# run Bayesian version of analysis above 
BfData <- lessonData
BfData$Modality = as.factor(BfData$Modality)
BfData$Timepoint <- as.factor(BfData$Timepoint)
BfData$ID <- as.factor(BfData$ID)
bf <- anovaBF(lessonScores ~ Timepoint*Modality +ID , data=BfData, whichRandom = 'ID')
print(bf) # print the results of the bayesian analysis
bf[4] / bf[3] # print the bayes factor for the interaction effect 
```

## Usefulness: descriptives of ratings, collected at endlines only

Creating new outcome variables for visit usefulness based on modality rather than ITaP session

```{r}
data$HybridVisitUsefulness <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$ItaP3VisitUseful, 
                              data$ItaP4VisitUseful)
data$InpersonVisitUsefulness <- ifelse(data$ITaP4Mod == "Hybrid", 
                              data$ItaP3VisitUseful, 
                              data$ItaP4VisitUseful)
data$HybridCompUsefulness <- ifelse(data$ITaP3Mod == "Hybrid", 
                              data$ITaP3CompUseful, 
                              data$ITaP4CompUseful)
data$InpersonCompUsefulness <- ifelse(data$ITaP4Mod == "Hybrid", 
                              data$ITaP3CompUseful, 
                              data$ITaP4CompUseful)

```

Tables of results by modality (all respondents, regardless of whether they rated both modalities)

```{r}

##Overall usefulness of the Hybrid Visit (regardless of which ITaP)
HybridVisitUseful_tibble <- data %>%
  filter(!is.na(HybridVisitUsefulness)) %>% 
  mutate(HybridVisitUsefulness = factor(HybridVisitUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(HybridVisitUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(HybridVisitUseful_tibble)

##Overall usefulness of the In-person Visit (regardless of which ITaP)
InpersonVisitUseful_tibble <- data %>%
  filter(!is.na(InpersonVisitUsefulness)) %>% 
  mutate(InpersonVisitUsefulness = factor(InpersonVisitUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(InpersonVisitUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(InpersonVisitUseful_tibble)

##Overall usefulness of the Hybrid component as a whole (regardless of which ITaP)
HybridCompUseful_tibble <- data %>%
  filter(!is.na(HybridCompUsefulness)) %>% 
  mutate(HybridCompUsefulness = factor(HybridCompUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(HybridCompUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(HybridCompUseful_tibble)

##Overall usefulness of the In-person component as a whole (regardless of which ITaP)
InpersonCompUseful_tibble <- data %>%
  filter(!is.na(InpersonCompUsefulness)) %>% 
  mutate(InpersonCompUsefulness = factor(InpersonCompUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(InpersonCompUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(InpersonCompUseful_tibble)

##Usefulness of the ITaP3 Questioning Visit (overall and by modality)
ITaP3VisitUseful_tibble_ungrouped <- data %>%
  mutate(ItaP3VisitUseful = factor(ItaP3VisitUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ItaP3VisitUseful)) %>%
  count(ItaP3VisitUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2)) %>%
  mutate(ITaP3Mod = "Overall") # Add an 'Overall' category for the ungrouped data
ITaP3VisitUseful_tibble <- data %>%
  mutate(ItaP3VisitUseful = factor(ItaP3VisitUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ItaP3VisitUseful)) %>% 
  group_by(ITaP3Mod) %>%
  count(ItaP3VisitUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2))
ITaP3VisitUseful_tibble <- bind_rows(ITaP3VisitUseful_tibble_ungrouped, ITaP3VisitUseful_tibble) %>%
    select(ITaP3Mod, everything())
print(ITaP3VisitUseful_tibble)


##Usefulness of the ITaP4 Scaffolding Visit (overall and by modality)
ITaP4VisitUseful_tibble_ungrouped <- data %>%
  mutate(ItaP4VisitUseful = factor(ItaP4VisitUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ItaP4VisitUseful)) %>%
  count(ItaP4VisitUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2)) %>%
  mutate(ITaP4Mod = "Overall") # Add an 'Overall' category for the ungrouped data
ITaP4VisitUseful_tibble <- data %>%
  mutate(ItaP4VisitUseful = factor(ItaP4VisitUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ItaP4VisitUseful)) %>% 
  group_by(ITaP4Mod) %>%
  count(ItaP4VisitUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2))
ITaP4VisitUseful_tibble <- bind_rows(ITaP4VisitUseful_tibble_ungrouped, ITaP4VisitUseful_tibble) %>%
    select(ITaP4Mod, everything())
print(ITaP4VisitUseful_tibble)

#Usefulness of the ITaP3 Questioning Component as a whole (overall and by modality)
ITaP3ComponentUseful_tibble_ungrouped <- data %>%
  mutate(ITaP3CompUseful = factor(ITaP3CompUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ITaP3CompUseful)) %>%
  count(ITaP3CompUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2)) %>%
  mutate(ITaP3Mod = "Overall") # Add an 'Overall' category for the ungrouped data
ITaP3ComponentUseful_tibble <- data %>%
  mutate(ITaP3CompUseful = factor(ITaP3CompUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ITaP3CompUseful)) %>% 
  group_by(ITaP3Mod) %>%
  count(ITaP3CompUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2))
ITaP3ComponentUseful_tibble <- bind_rows(ITaP3ComponentUseful_tibble_ungrouped, ITaP3ComponentUseful_tibble) %>%
    select(ITaP3Mod, everything())
print(ITaP3ComponentUseful_tibble)

#Usefulness of the ITaP4 Scaffolding Component as a whole (overall and by modality)
ITaP4ComponentUseful_tibble_ungrouped <- data %>%
  mutate(ITaP4CompUseful = factor(ITaP4CompUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ITaP4CompUseful)) %>%
  count(ITaP4CompUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2)) %>%
  mutate(ITaP4Mod = "Overall") # Add an 'Overall' category for the ungrouped data
ITaP4ComponentUseful_tibble <- data %>%
  mutate(ITaP4CompUseful = factor(ITaP4CompUseful, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  filter(!is.na(ITaP4CompUseful)) %>% 
  group_by(ITaP4Mod) %>%
  count(ITaP4CompUseful) %>%
  mutate(`% of respondents within modality` = round(n / sum(n) * 100, 2))
ITaP4ComponentUseful_tibble <- bind_rows(ITaP4ComponentUseful_tibble_ungrouped, ITaP4ComponentUseful_tibble) %>%
    select(ITaP4Mod, everything())
print(ITaP4ComponentUseful_tibble)
```

Create bar charts showing frequencies of visit/component usefulness responses split by modality, for the WHOLE sample

```{r}
HybridVisitUseful_tibble_v2 <- HybridVisitUseful_tibble %>%
  mutate('ITaP modality' = "Hybrid") %>%
  rename("Visit Usefulness" = 'HybridVisitUsefulness')
InpersonVisitUseful_tibble_v2 <- InpersonVisitUseful_tibble %>%
  mutate('ITaP modality' = "In person")  %>%
  rename("Visit Usefulness" = 'InpersonVisitUsefulness')
Bothmodalities_Visituseful_tibble <- bind_rows(HybridVisitUseful_tibble_v2, InpersonVisitUseful_tibble_v2) #combine the two modalities' tibbles into one, and add modality as an extra column
VisitUsefulness_plot_bothmod <- ggplot(Bothmodalities_Visituseful_tibble, aes(x = `Visit Usefulness`, y = `% of respondents`, fill = `ITaP modality`)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete() +
  theme_minimal() +
  xlab("Visit Usefulness") + ylab("% of respondents")

HybridCompUseful_tibble_v2 <- HybridCompUseful_tibble %>%
  mutate('ITaP modality' = "Hybrid") %>%
  rename("Component Usefulness" = 'HybridCompUsefulness')
InpersonCompUseful_tibble_v2 <- InpersonCompUseful_tibble %>%
  mutate('ITaP modality' = "In person")  %>%
  rename("Component Usefulness" = 'InpersonCompUsefulness') %>%
  add_row(`Component Usefulness` = "Not at all useful", n = 0, `% of respondents` = 0, `ITaP modality` = "In person") %>% #manually add a 'not at all useful' column for in-person since frequency was zero
  mutate(`Component Usefulness` = factor(`Component Usefulness`, 
                                         levels = c("Not at all useful", "Slightly useful", 
                                                    "Moderately useful", "Very useful", 
                                                    "Extremely useful"))) %>%
  arrange(`Component Usefulness`)
Bothmodalities_Compuseful_tibble <- bind_rows(HybridCompUseful_tibble_v2, InpersonCompUseful_tibble_v2) #combine the two modalities' tibbles into one, and add modalitty as an extra column
CompUsefulness_plot_bothmod <- ggplot(Bothmodalities_Compuseful_tibble, aes(x = `Component Usefulness`, y = `% of respondents`, fill = `ITaP modality`)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete() +
  theme_minimal() +
  xlab("ITaP Component Usefulness") + ylab("% of respondents")
VisitUsefulness_plot_bothmod
CompUsefulness_plot_bothmod
```

```{r}
#Create data frames for only those participants who gave ratings for both ITaPs

##Overall usefulness of the Hybrid Visit (regardless of which ITaP) for this subset of people
HybridVisitUseful_tibble_bothitaps <- data %>%
  filter(!is.na(HybridVisitUsefulness) & !is.na(InpersonVisitUsefulness))  %>% 
  mutate(HybridVisitUsefulness = factor(HybridVisitUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(HybridVisitUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(HybridVisitUseful_tibble_bothitaps)

##Overall usefulness of the In-person Visit (regardless of which ITaP) for this subset of people
InpersonVisitUseful_tibble_bothitaps <- data %>%
  filter(!is.na(HybridVisitUsefulness) & !is.na(InpersonVisitUsefulness)) %>% 
  mutate(InpersonVisitUsefulness = factor(InpersonVisitUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(InpersonVisitUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(InpersonVisitUseful_tibble_bothitaps)

##Overall usefulness of the Hybrid Component (regardless of which ITaP) for this subset of people
HybridCompUseful_tibble_bothitaps <- data %>%
  filter(!is.na(HybridCompUsefulness) & !is.na(InpersonCompUsefulness))  %>% 
  mutate(HybridCompUsefulness = factor(HybridCompUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(HybridCompUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(HybridCompUseful_tibble_bothitaps)

##Overall usefulness of the In-person Component (regardless of which ITaP) for this subset of people
InpersonCompUseful_tibble_bothitaps <- data %>%
  filter(!is.na(HybridCompUsefulness) & !is.na(InpersonCompUsefulness)) %>% 
  mutate(InpersonCompUsefulness = factor(InpersonCompUsefulness, levels = c("Not at all useful", "Slightly useful", "Moderately useful", "Very useful", "Extremely useful"))) %>% # Reorder response options
  count(InpersonCompUsefulness) %>%
  mutate(`% of respondents` = round(n / sum(n) * 100, 2))
print(InpersonCompUseful_tibble_bothitaps)
```

Create bar charts showing frequencies of visit/component usefulness responses split by modality, for ONLY those who rated usefulness of both ITaPs

```{r}
HybridVisitUseful_tibble_bothitaps_v2 <- HybridVisitUseful_tibble_bothitaps %>%
  mutate('ITaP modality' = "Hybrid") %>%
  rename("Visit Usefulness" = 'HybridVisitUsefulness')
InpersonVisitUseful_tibble_bothitaps_v2 <- InpersonVisitUseful_tibble_bothitaps %>%
  mutate('ITaP modality' = "In person")  %>%
  rename("Visit Usefulness" = 'InpersonVisitUsefulness')
Bothmodalities_Visituseful_tibble_bothitaps <- bind_rows(HybridVisitUseful_tibble_bothitaps_v2, InpersonVisitUseful_tibble_bothitaps_v2) #combine the two modalities' tibbles into one, and add modality as an extra column
VisitUsefulness_plot_bothmod_bothitaps <- ggplot(Bothmodalities_Visituseful_tibble_bothitaps, aes(x = `Visit Usefulness`, y = `% of respondents`, fill = `ITaP modality`)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete() +
  theme_minimal() +
  xlab("Visit Usefulness") + ylab("% of respondents")

HybridCompUseful_tibble_bothtaps_v2 <- HybridCompUseful_tibble_bothitaps %>%
  mutate('ITaP modality' = "Hybrid") %>%
  rename("Component Usefulness" = 'HybridCompUsefulness') %>%
  add_row(`Component Usefulness` = "Not at all useful", n = 0, `% of respondents` = 0, `ITaP modality` = "In person") %>% #manually add a 'not at all useful' column for in-person since frequency was zero
  mutate(`Component Usefulness` = factor(`Component Usefulness`, 
                                         levels = c("Not at all useful", "Slightly useful", 
                                                    "Moderately useful", "Very useful", 
                                                    "Extremely useful"))) %>%
  arrange(`Component Usefulness`)
InpersonCompUseful_tibble_bothitaps_v2 <- InpersonCompUseful_tibble_bothitaps %>%
  mutate('ITaP modality' = "In person")  %>%
  rename("Component Usefulness" = 'InpersonCompUsefulness') %>%
  add_row(`Component Usefulness` = "Not at all useful", n = 0, `% of respondents` = 0, `ITaP modality` = "In person") %>% #manually add a 'not at all useful' column for in-person since frequency was zero
  mutate(`Component Usefulness` = factor(`Component Usefulness`, 
                                         levels = c("Not at all useful", "Slightly useful", 
                                                    "Moderately useful", "Very useful", 
                                                    "Extremely useful"))) %>%
  arrange(`Component Usefulness`)
Bothmodalities_Compuseful_tibble_bothitaps_v2 <- bind_rows(HybridCompUseful_tibble_bothtaps_v2, InpersonCompUseful_tibble_bothitaps_v2) #combine the two modalities' tibbles into one, and add modality as an extra column
CompUsefulness_plot_bothmod_bothitaps <- ggplot(Bothmodalities_Compuseful_tibble_bothitaps_v2, aes(x = `Component Usefulness`, y = `% of respondents`, fill = `ITaP modality`)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete() +
  theme_minimal() +
  xlab("ITaP Component Usefulness") + ylab("% of respondents")
VisitUsefulness_plot_bothmod_bothitaps
CompUsefulness_plot_bothmod_bothitaps
ggsave(filename = "visitusefulnessbothitaps.jpg", plot = VisitUsefulness_plot_bothmod_bothitaps, dpi = 300, quality = 100)
ggsave(filename = "itapusefulnessbothitaps.jpg", plot = CompUsefulness_plot_bothmod_bothitaps, dpi = 300, quality = 100)

```

```{r}
#Recode data as ordinal values in preparation for inferential tests
data <- data %>%
  mutate(HybridVisitUsefulness_numeric = recode(HybridVisitUsefulness,
                                    "Not at all useful" = 1,
                                    "Slightly useful" = 2,
                                    "Moderately useful" = 3,
                                    "Very useful" = 4,
                                    "Extremely useful" = 5)) %>%
  mutate(InpersonVisitUsefulness_numeric = recode(InpersonVisitUsefulness,
                                    "Not at all useful" = 1,
                                    "Slightly useful" = 2,
                                    "Moderately useful" = 3,
                                    "Very useful" = 4,
                                    "Extremely useful" = 5)) %>%
  mutate(HybridCompUsefulness_numeric = recode(HybridCompUsefulness,
                                    "Not at all useful" = 1,
                                    "Slightly useful" = 2,
                                    "Moderately useful" = 3,
                                    "Very useful" = 4,
                                    "Extremely useful" = 5)) %>%
  mutate(InpersonCompUsefulness_numeric = recode(InpersonCompUsefulness,
                                    "Not at all useful" = 1,
                                    "Slightly useful" = 2,
                                    "Moderately useful" = 3,
                                    "Very useful" = 4,
                                    "Extremely useful" = 5)) %>%
mutate(HybridVisitUsefulness_numeric = as.numeric(HybridVisitUsefulness_numeric), InpersonVisitUsefulness_numeric = as.numeric(InpersonVisitUsefulness_numeric), HybridCompUsefulness_numeric = as.numeric(HybridCompUsefulness_numeric), InpersonCompUsefulness_numeric = as.numeric(InpersonCompUsefulness_numeric))

VisitusefulnessData_both <- data %>%
  filter(!is.na(HybridVisitUsefulness_numeric) & !is.na(InpersonVisitUsefulness_numeric)) %>%
  select(ID, HybridVisitUsefulness_numeric, InpersonVisitUsefulness_numeric)
CompusefulnessData_both <- data %>%
  filter(!is.na(HybridCompUsefulness_numeric) & !is.na(InpersonCompUsefulness_numeric)) %>%
  select(ID, HybridCompUsefulness_numeric, InpersonCompUsefulness_numeric)
```

```{r}
# Run Wilcoxon signed-rank test and rank-biserial correlation for effect of modality on VISIT usefulness - only for those people who rated both ITaPs
visitusefulnesswilcoxon <- wilcox.test(VisitusefulnessData_both$HybridVisitUsefulness_numeric, 
                           VisitusefulnessData_both$InpersonVisitUsefulness, 
                           paired = TRUE)

print(visitusefulnesswilcoxon)

# Run Wilcoxon signed-rank test and rank-biserial correlation for effect of modality on COMPONENT usefulness - only for those people who rated both ITaPs
compusefulnesswilcoxon <- wilcox.test(CompusefulnessData_both$HybridCompUsefulness_numeric, 
                           CompusefulnessData_both$InpersonCompUsefulness, 
                           paired = TRUE)

print(compusefulnesswilcoxon)
```

## Numbers of trainees at each timepoint for the main outcome measures

```{r}
nSummary <- matrix(NA,6,7)
nSummary[1,1] <- sum(!is.na(as.numeric(data$HybridEffBaseScore))) # column 1 is Hybrid baseline
nSummary[1,2] <- sum((!is.na(as.numeric(data$HybridEffEndScore)))) # column 2 is Hybrid endline
nSummary[1,3] <- sum((!is.na(as.numeric(data$InpersonEffBaseScore)))) # column 3 is In-person baseline 
nSummary[1,4] <- sum((!is.na(as.numeric(data$InpersonEffEndScore)))) # column 4 is In-person endline 
nSummary[1,5] <- sum(((!is.na(as.numeric(data$HybridEffBase))) + (!is.na(as.numeric(data$HybridEffEnd))) + (!is.na(as.numeric(data$InpersonEffBase))) + (!is.na(as.numeric(data$InpersonEffEnd)))) == 4) # column 5 is for total number of participants with data at both modalities and at both timepoints
nSummary[1,6] <- sum(((!is.na(as.numeric(data$HybridEffEnd))) + (!is.na(as.numeric(data$InpersonEffEnd)))) == 2) # column 6 is for total number of participants with data at both endpoints
nSummary[1,7] <- ifelse(as.numeric(nSummary[1,6]) >= 1.2 * as.numeric(nSummary[1,5]), "Yes", "No")
nSummary[2,1] <- sum(!is.na(as.numeric(data$HybridQuizBaseScore))) # column 1 is Hybrid baseline
nSummary[2,2] <- sum((!is.na(as.numeric(data$HybridQuizEndScore)))) # column 2 is Hybrid endline
nSummary[2,3] <- sum((!is.na(as.numeric(data$InpersonQuizBaseScore)))) # column 3 is In-person baseline 
nSummary[2,4] <- sum((!is.na(as.numeric(data$InpersonQuizEndScore)))) # column 4 is In-person endline 
nSummary[2,5] <- sum(((!is.na(as.numeric(data$HybridQuizBaseScore))) + (!is.na(as.numeric(data$HybridQuizEndScore))) + (!is.na(as.numeric(data$InpersonQuizBaseScore))) + (!is.na(as.numeric(data$InpersonQuizEndScore)))) == 4) # column 5 is for total number of participants with data at both modalities and at both timepoints
nSummary[2,6] <- sum(((!is.na(as.numeric(data$HybridQuizEndScore))) + (!is.na(as.numeric(data$InpersonQuizEndScore)))) == 2) # column 6 is for total number of participants with data at both endpoints
nSummary[2,7] <- ifelse(as.numeric(nSummary[2,6]) >= 1.2 * as.numeric(nSummary[2,5]), "Yes", "No")
nSummary[3,1] <- paste("NA") #frequencies for digital approximations not calculated as ITaP4 data not coded due to lack of endlines
nSummary[3,2] <- paste("NA")
nSummary[3,3] <- paste("NA")
nSummary[3,4] <- paste("NA")
nSummary[3,5] <- paste("NA")
nSummary[3,6] <- paste("NA")
nSummary[3,7] <- paste("NA")
nSummary[4,1] <- sum(!is.na(as.numeric(data$HybridLessonBaseScore))) # column 1 is Hybrid baseline
nSummary[4,2] <- sum((!is.na(as.numeric(data$HybridLessonEndScore)))) # column 2 is Hybrid endline
nSummary[4,3] <- sum((!is.na(as.numeric(data$InpersonLessonBaseScore)))) # column 3 is In-person baseline 
nSummary[4,4] <- sum((!is.na(as.numeric(data$InpersonLessonEndScore)))) # column 4 is In-person endline 
nSummary[4,5] <- sum(((!is.na(as.numeric(data$HybridLessonBaseScore))) + (!is.na(as.numeric(data$HybridLessonEndScore))) + (!is.na(as.numeric(data$InpersonLessonBaseScore))) + (!is.na(as.numeric(data$InpersonLessonEndScore)))) == 4) # column 5 is for total number of participants with data at both modalities and at both timepoints
nSummary[4,6] <- sum(((!is.na(as.numeric(data$HybridLessonEndScore))) + (!is.na(as.numeric(data$InpersonLessonEndScore)))) == 2) # column 6 is for total number of participants with data at both endpoints
nSummary[4,7] <- ifelse(as.numeric(nSummary[4,6]) >= 1.2 * as.numeric(nSummary[4,5]), "Yes", "No")
nSummary[5,1] <- paste("")
nSummary[5,2] <- sum(!is.na(as.numeric(data$HybridVisitUsefulness_numeric)))
nSummary[5,3] <- paste("")
nSummary[5,4] <- sum(!is.na(as.numeric(data$InpersonVisitUsefulness_numeric)))
nSummary[5,5] <- paste("")
nSummary[5,6] <- sum(((!is.na(as.numeric(data$HybridVisitUsefulness_numeric))) + (!is.na(as.numeric(data$InpersonVisitUsefulness_numeric)))) == 2) # column 6 is for total number of participants with data at both endpoints
nSummary[5,7] <- paste("")
nSummary[6,1] <- paste("")
nSummary[6,2] <- sum(!is.na(as.numeric(data$HybridCompUsefulness_numeric)))
nSummary[6,3] <- paste("")
nSummary[6,4] <- sum(!is.na(as.numeric(data$InpersonCompUsefulness_numeric)))
nSummary[6,5] <- paste("")
nSummary[6,6] <- sum(((!is.na(as.numeric(data$HybridCompUsefulness_numeric))) + (!is.na(as.numeric(data$InpersonCompUsefulness_numeric)))) == 2) # column 6 is for total number of participants with data at both endpoints
nSummary[6,7] <- paste("")
rownames(nSummary) <- c("Number of valid participants, self-efficacy", "Number of valid participants, quizzes","Number of valid participants, digital approximations", "Number of valid participants, lesson plans", "Number of valid participants, visit usefulness", "Number of valid participants, component usefulness")
colnames(nSummary) <- c("Hybrid Baseline","Hybrid Endline","In-person Baseline","In-person Endline","Both Modalities and Timepoints", "Both Endpoints", "Is sample with data at both endpoints >= 20% larger than sample with full data?")
knitr::kable(nSummary, "simple")
```
